gorgonia.org/gorgonia@v0.9.17/cuda modules/compile.py (about)

     1  import glob
     2  from subprocess import call
     3  from os.path import basename, splitext
     4  
     5  compute = [30, 32, 35, 37, 50, 52, 53, 60, 61, 62]
     6  def main():
     7  	src = glob.glob("src/*.cu")
     8  	cmd = ['nvcc', '-o=', '-arch=', '-ptx', '-Xptxas', '-allow-expensive-optimizations']
     9  	# slow
    10  	cmdslow = cmd[:]
    11  	cmdslow.extend(['-fmad=false', '-ftz=false', '-prec-div=true', '-prec-sqrt=true', "INPUTFILE"])
    12  
    13  	for f in src:
    14  		name, ext = splitext(basename(f))
    15  		for cc in compute:
    16  			cmdslow[1] = '-o="target/' + name + '_cc' + str(cc) + '.ptx"'
    17  			cmdslow[-1] = f
    18  			cmdslow[2] = '-arch=compute_'+str(cc)
    19  			print(cmdslow)
    20  			call(cmdslow)
    21  
    22  	#fast
    23  	cmdfast = cmd[:]
    24  	cmdfast.extend(['-fmad=false', '-use_fast_math', "INPUTFILE"])
    25  
    26  if __name__ == '__main__':
    27  	main()